<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <meta name="author" content="PG-Strom Development Team">
  <link rel="shortcut icon" href="../img/favicon.ico">
  <title>PL/CUDA - PG-Strom Manual</title>
  <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>

  <link rel="stylesheet" href="../css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
  <link href="//fonts.googleapis.com/earlyaccess/notosansjp.css" rel="stylesheet">
  <link href="//fonts.googleapis.com/css?family=Open+Sans:600,800" rel="stylesheet">
  <link href="../custom.css" rel="stylesheet">
  
  <script>
    // Current page data
    var mkdocs_page_name = "PL/CUDA";
    var mkdocs_page_input_path = "plcuda.md";
    var mkdocs_page_url = null;
  </script>
  
  <script src="../js/jquery-2.1.1.min.js" defer></script>
  <script src="../js/modernizr-2.8.3.min.js" defer></script>
  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
  <script>hljs.initHighlightingOnLoad();</script> 
  
</head>

<body class="wy-body-for-nav" role="document">

  <div class="wy-grid-for-nav">

    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
      <div class="wy-side-nav-search">
        <a href=".." class="icon icon-home"> PG-Strom Manual</a>
        <div role="search">
  <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
  </form>
  [<a href="../ja/plcuda/" style="color: #cccccc">Japanese</a> | <strong>English</strong>]
</div>
      </div>

      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
	<ul class="current">
	  
          
            <li class="toctree-l1">
		
    <a class="" href="..">Home</a>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../install/">Install</a>
	    </li>
          
            <li class="toctree-l1">
		
    <span class="caption-text">Tutorial</span>
    <ul class="subnav">
                <li class="">
                    
    <a class="" href="../operations/">Basic Operations</a>
                </li>
                <li class="">
                    
    <a class="" href="../sys_admin/">System Administration</a>
                </li>
                <li class="">
                    
    <a class="" href="../brin/">Index Support</a>
                </li>
                <li class="">
                    
    <a class="" href="../partition/">Partitioning</a>
                </li>
                <li class="">
                    
    <a class="" href="../troubles/">Trouble Shooting</a>
                </li>
    </ul>
	    </li>
          
            <li class="toctree-l1">
		
    <span class="caption-text">Advanced Features</span>
    <ul class="subnav">
                <li class="">
                    
    <a class="" href="../ssd2gpu/">SSD2GPU Direct SQL</a>
                </li>
                <li class="">
                    
    <a class="" href="../arrow_fdw/">Arrow_fdw</a>
                </li>
                <li class="">
                    
    <a class="" href="../gstore_fdw/">Gstore_fdw</a>
                </li>
                <li class=" current">
                    
    <a class="current" href="./">PL/CUDA</a>
    <ul class="subnav">
            
    <li class="toctree-l3"><a href="#plcuda-overview">PL/CUDA Overview</a></li>
    

    <li class="toctree-l3"><a href="#plcuda-structure">PL/CUDA Structure</a></li>
    

    <li class="toctree-l3"><a href="#plcuda-references">PL/CUDA References</a></li>
    
        <ul>
        
            <li><a class="toctree-l4" href="#advantage-and-disadvantage-of-plcuda">Advantage and disadvantage of PL/CUDA</a></li>
        
            <li><a class="toctree-l4" href="#plcuda-directives">PL/CUDA Directives</a></li>
        
            <li><a class="toctree-l4" href="#plcuda-related-functions">PL/CUDA Related Functions</a></li>
        
        </ul>
    

    </ul>
                </li>
    </ul>
	    </li>
          
            <li class="toctree-l1">
		
    <span class="caption-text">References</span>
    <ul class="subnav">
                <li class="">
                    
    <a class="" href="../ref_types/">Data Types</a>
                </li>
                <li class="">
                    
    <a class="" href="../ref_devfuncs/">Functions and Operators</a>
                </li>
                <li class="">
                    
    <a class="" href="../ref_sqlfuncs/">SQL Objects</a>
                </li>
                <li class="">
                    
    <a class="" href="../ref_params/">GUC Parameters</a>
                </li>
    </ul>
	    </li>
          
            <li class="toctree-l1">
		
    <a class="" href="../release_note/">Release Note</a>
	    </li>
          
        </ul>
      </div>
      &nbsp;
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
        <a href="..">PG-Strom Manual</a>
      </nav>

      
      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
    <li><a href="..">Docs</a> &raquo;</li>
    
      
        
          <li>Advanced Features &raquo;</li>
        
      
    
    <li>PL/CUDA</li>
    <li class="wy-breadcrumbs-aside">
      
    </li>
  </ul>
  <hr/>
</div>
          <div role="main">
            <div class="section">
              
                <p>This chapter introduces the way to implement GPU executable native program as SQL functions, using PL/CUDA procedural language.</p>
<h1 id="plcuda-overview">PL/CUDA Overview</h1>
<p>PG-Strom internally constructs GPU programs by CUDA language, according to the supplied SQL, then generates GPU's native binary using just-in-time compile. CUDA is a programming environment provided by NVIDIA. It allows implementing parallel program which is executable on GPU device, using C-like statement. This transformation process from SQL statement to CUDA program is an internal process, thus, no need to pay attention what GPU programs are generated and executed from the standpoint of users.</p>
<p>On the other hands, PostgreSQL supports to add programming language to implement SQL functions by <code>CREATE LANGUAGE</code> statement. PL/CUDA is a language handler to supports <code>CREATE LANGUAGE</code> command. It also allows users to run arbitrary GPU programs manually implemented as SQL functions, but not only GPU programs automatically generated by PG-Strom based on SQL.</p>
<p>Its argument can take the data types supported by PG-Strom, like numeric, text, or array-matrix data type. These arguments are implicitly loaded onto GPU device memory by the PL/CUDA infrastructure, so users don't need to pay attention for data loading between the database and GPU devices. In a similar fashion, the return value of PL/CUDA function (including the case of variable length data type) will be written back to CPU from GPU, then decode to the result of SQL function.</p>
<p>You can also use foreign tables defined with <code>gstore_fdw</code> as arguments of PL/CUDA function. In this case, no need to load the data onto GPU for each invocation because foreign table already keeps the data, and available to use larger data than 1GB which is a restriction of variable length data in PostgreSQL.</p>
<p>Therefore, users can focus on productive tasks like implementation of statistical analysis, code optimization and so on, without routine process like data input/output between GPU and databases.</p>
<p><img alt="PL/CUDA Overview" src="../img/plcuda-overview.png" /></p>
<p>Once a PL/CUDA function is declared using <code>CREATE FUNCTION</code>, it generates a CUDA program source code that embeds the definition of this function, then build it for the target GPU device.
This CUDA program is almost identical to usual GPU software based on CUDA runtime, except for the auxiliary code to receive arguments of SQL function and to write back its results. It also allows to include/link some libraries for CUDA device runtime.</p>
<p>Native CUDA programs implemented by PL/CUDA are executed as child-processes of PostgreSQL backend.
Therefore, it has independent address space and OS/GPU resources from PostgreSQL.
CUDA program contains host code for the host system and device code to be executed on GPU devices.
The host code can execute any logic we can program using C-language, so we restrict only database superuser can define PL/CUDA function from the standpoint of security.</p>
<p>Below is an example of simple PL/CUDA function. This function takes two same length <code>real[]</code> array as arguments, then returns its dot product in <code>float</code> data type.</p>
<pre><code>CREATE OR REPLACE FUNCTION
gpu_dot_product(real[], real[])
RETURNS float
AS $$
#plcuda_decl
#include &quot;cuda_matrix.h&quot;

KERNEL_FUNCTION_MAXTHREADS(void)
gpu_dot_product(double *p_dot,
                VectorTypeFloat *X,
                VectorTypeFloat *Y)
{
    size_t      index = get_global_id();
    size_t      nitems = X-&gt;height;
    float       v[MAXTHREADS_PER_BLOCK];
    float       sum;

    if (index &lt; nitems)
        v[get_local_id()] = X-&gt;values[index] * Y-&gt;values[index];
    else
        v[get_local_id()] = 0.0;

    sum = pgstromTotalSum(v, MAXTHREADS_PER_BLOCK);
    if (get_local_id() == 0)
        atomicAdd(p_dot, (double)sum);
    __syncthreads();
}
#plcuda_begin
{
    size_t      nitems;
    int         blockSz;
    int         gridSz;
    double     *dot;
    cudaError_t rc;

    if (!VALIDATE_ARRAY_VECTOR_TYPE_STRICT(arg1, PG_FLOAT4OID) ||
        !VALIDATE_ARRAY_VECTOR_TYPE_STRICT(arg2, PG_FLOAT4OID))
        EEXIT(&quot;arguments are not vector like array&quot;);
    nitems = ARRAY_VECTOR_HEIGHT(arg1);
    if (nitems != ARRAY_VECTOR_HEIGHT(arg2))
        EEXIT(&quot;length of arguments mismatch&quot;);

    rc = cudaMallocManaged(&amp;dot, sizeof(double));
    if (rc != cudaSuccess)
        CUEXIT(rc, &quot;failed on cudaMallocManaged&quot;);
    memset(dot, 0, sizeof(double));

    blockSz = MAXTHREADS_PER_BLOCK;
    gridSz = (nitems + MAXTHREADS_PER_BLOCK - 1) / MAXTHREADS_PER_BLOCK;
    gpu_dot_product&lt;&lt;&lt;gridSz,blockSz&gt;&gt;&gt;(dot,
                                        (VectorTypeFloat *)arg1,
                                        (VectorTypeFloat *)arg2);
    rc = cudaStreamSynchronize(NULL);
    if (rc != cudaSuccess)
        CUEXIT(rc, &quot;failed on cudaStreamSynchronize&quot;);

    return *dot;
}
#plcuda_end
$$ LANGUAGE 'plcuda';
</code></pre>

<p>PL/CUDA infrastructure makes entrypoint function of CUDA program by the block between <code>#plcuda_begin</code> and <code>#plcuda_end</code> with extra code to exchange arguments of SQL function.
The portion enclosed by <code>#plcuda_decl</code> and <code>#plcuda_begin</code> is a block for declaration of GPU device functions and other host functions. It is placed prior to the entrypoint above.</p>
<p>At the entrypoint of the CUDA program, you can refer the arguments of SQL function using <code>arg1</code>, `arg2, and so on.</p>
<p>In the above example, the <code>arg1</code> and <code>arg2</code>, <code>real[]</code> array type, are passed to the entrypoint, then <code>VALIDATE_ARRAY_VECTOR_TYPE_STRICT</code> macro checks whether it is 1-dimensional array of 32bit floating-point values without NULL.</p>
<p>Ditto with return value, the entrypoint returns a value in CUDA C representation corresponding to the SQL data type.
If entrypoint does not return any value (or, it exits the program with status code 1 by <code>exit()</code>), it is considered PL/CUDA function returns <code>NULL</code>. </p>
<p>The above sample program validates the array of <code>real</code> values passed from SQL function, then it allocates the result buffer by <code>cudaMallocManaged</code>, and invokes <code>gpu_dot_product</code>, a GPU kernel function, to compute dot product with two vectors.</p>
<p>The result of this function is below. It computes the dot product of two vectors which contain 10,000 items randomly generated.</p>
<pre><code>postgres=# SELECT gpu_dot_product(array_matrix(random()::real),
                                  array_matrix(random()::real))
             FROM generate_series(1,10000);
 gpu_dot_product
------------------
 3.71461999509484
(1 row)
</code></pre>

<h1 id="plcuda-structure">PL/CUDA Structure</h1>
<p>Function declaration of PL/CUDA is consists of two code blocks split by the directives of <code>#plcuda_decl</code>, <code>#plcuda_begin</code> and <code>#plcuda_end</code>. Users can put their custom code on the code blocks according to the purpose, then PL/CUDA language handler reconstruct them into single source file with extra logic to exchange function arguments and results.</p>
<pre><code>#plcuda_decl
  [...any declarations...]
#plcuda_begin
  [...host code in the entrypoint...]
#plcuda_end
</code></pre>

<p>The code block, begins from <code>#plcuda_decl</code>, can have declaration of <code>__host__</code> and <code>__device__</code> functions and variables for CUDA C.
This code block locates in front of the entrypoint function which contains the code block between <code>#plcuda_begin</code> and <code>#plcuda_end</code> at the source file eventually constructed.</p>
<p>If external header files are included using <code>#include</code> statement of CUDA C, put the statement on this code block.</p>
<p>The code block between <code>#plcuda_begin</code> and <code>#plcuda_end</code> is embedded to a part of entrypoint function. Therefore, it does not describe function name, arguments definition and so on.
Prior to execution of the code block, the entrypoint function receives arguments of the SQL function from PostgreSQL backend, and set up <code>arg1</code>, <code>arg2</code>, ... variables for further references.</p>
<p>These variables have the following CUDA C representation according to SQL data types.</p>
<table>
<thead>
<tr>
<th align="center">SQL data type</th>
<th align="center">CUDA C data type</th>
<th align="center">Examples</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center"><code>reggstore</code></td>
<td align="center"><code>void *</code></td>
<td align="center">OID of Gstore_fdw foreign table</td>
</tr>
<tr>
<td align="center"><code>real</code></td>
<td align="center"><code>float</code></td>
<td align="center">32bit floating point</td>
</tr>
<tr>
<td align="center"><code>float</code></td>
<td align="center"><code>double</code></td>
<td align="center">64bit floating point</td>
</tr>
<tr>
<td align="center">Other inline data types</td>
<td align="center"><code>Datum</code></td>
<td align="center"><code>int</code>, <code>date</code>, ...</td>
</tr>
<tr>
<td align="center">Fixed-length value by reference</td>
<td align="center"><code>void *</code></td>
<td align="center"><code>uuid</code>, ...</td>
</tr>
<tr>
<td align="center">Variable-length value (varlena)</td>
<td align="center"><code>varlena *</code></td>
<td align="center"><code>text</code>, <code>real[]</code>, ...</td>
</tr>
</tbody>
</table>
<p>PL/CUDA language handler constructs a single CUDA C source file from the code blocks above, then builds it once by <code>nvcc</code> compiler at declaration or execution time.
If it contains any <code>#plcuda_include</code> directive, its source code is not fixed until execution time, so built at the execution time only. In case when identical CUDA program is already pre-built, we can reuse it without rebuild.</p>
<p><img alt="PL/CUDA Callflow" src="../img/plcuda-callflow.png" /></p>
<p>When SQL command invokes PL/CUDA function, PL/CUDA language handler launch the pre-built CUDA program, then copies the arguments of SQL function over pipe. These are stored in the argument buffer of the CUDA program, so custom logic can refer them using <code>arg1</code> or <code>arg2</code> variables.</p>
<p>The data types by reference at CUDA C program, like variable-length datum, are initialized as pointers to the argument buffer. It is a managed memory region allocated by <code>cudaMallocManaged()</code>, these pointers are available without explicit DMA between host system and GPU devices.</p>
<p>Here is a special case if argument has <code>reggstore</code> type. It is actually an OID (32bit integer) of Gstore_Fdw foreign table, however, it is replaced to the reference of GPU device memory acquired by the Gstore_Fdw foreign table if it is supplied as PL/CUDA argument.</p>
<p>The argument is setup to the pointer for <code>GstoreIpcMapping</code> object. <code>GstoreIpcMapping::map</code> holds the mapped address of the GPU device memory acquired by the Gstore_Fdw foreign table.
<code>GstoreIpcHandle::device_id</code> indicates the device-id of GPU which physically holds the region, and GstoreIpcHandle::rawsize` is raw length of the region.</p>
<pre><code>typedef struct
{
    cl_uint     __vl_len;       /* 4B varlena header */
    cl_short    device_id;      /* GPU device where pinning on */
    cl_char     format;         /* one of GSTORE_FDW_FORMAT__* */
    cl_char     __padding__;    /* reserved */
    cl_long     rawsize;        /* length in bytes */
    union {
#ifdef CU_IPC_HANDLE_SIZE
        CUipcMemHandle      d;  /* CUDA driver API */
#endif
#ifdef CUDA_IPC_HANDLE_SIZE
        cudaIpcMemHandle_t  r;  /* CUDA runtime API */
#endif
        char                data[64];
    } ipc_mhandle;
} GstoreIpcHandle;

typedef struct
{
    GstoreIpcHandle h; /* IPChandle of Gstore_Fdw */
    void       *map;    /* mapped device pointer */
} GstoreIpcMapping;
</code></pre>

<p>PL/CUDA function can return its result using <code>return</code> of a CUDA C datum relevant to the SQL data type.
In case when no <code>return</code> clause is executed, <code>NULL</code> pointer is returned if CUDA C data type is pointer, or CUDA program is terminated with status code = 1 by <code>exit(1)</code>, PL/CUDA function returns <code>null</code> to SQL.</p>
<h1 id="plcuda-references">PL/CUDA References</h1>
<p>This section is a reference for PL/CUDA function's directives and related SQL functions.</p>
<h2 id="advantage-and-disadvantage-of-plcuda">Advantage and disadvantage of PL/CUDA</h2>
<p>On invocation of PL/CUDA function, it launches the relevant CUDA program on behalf of the invocation, then CUDA program initialize per process context of GPU device. The series of operations are never lightweight, so we don't recommend to implement a simple comparison of scalar values using PL/CUDA, and use for full table scan on billion rows.</p>
<p>On the other hands, once GPU device is correctly initialized, it allows to process massive amount of data using several thousands of processor cores on GPU device. Especially, it is suitable for computing intensive workloads, like machine-learning or advanced analytics that approach to the optimal values by repeated calculation for example.</p>
<p>According to the growth of data size, we need to pay attention how to exchange data with CUDA program.
PostgreSQL supports array types, and it is easy and simple way to exchange several millions of integer or real values at most.</p>
<p>However, variable-length datum of PostgreSQL, including the array-types, is restricted to 1GB at a maximum.
We need to take a little idea to handle larger data, like separation of data-set. In addition, PostgreSQL backend process set up the argument of SQL functions in single thread, so it takes a certain amount of time to manipulate gigabytes-class memory object.</p>
<p>Please consider usage of Gstore_Fdw foreign-table when data size grows more than several hundreds megabytes.
Once you preload the large data-set onto GPU device memory through Gstore_Fdw, no need to set up large arguments on invocation of PL/CUDA function. It also allows to keep larger data than gigabytes, as lond as GPU device memory capacity allows.</p>
<h2 id="plcuda-directives">PL/CUDA Directives</h2>
<h3 id="plcuda_decl"><code>#plcuda_decl</code></h3>
<p>This directive begins a code block which contains CUDA C functions and variables with both of <code>__host__</code> and <code>__device__</code> attributes. PL/CUDA language handler copies this code block in front of the program entrypoint as is.</p>
<p>Use of this directive is optional, however, it makes no sense if here is no declaration of GPU kernel functions to be called from the entrypoint. So, we usually have more than one GPU kernel function.</p>
<h3 id="plcuda_begin"><code>#plcuda_begin</code></h3>
<p>This directive begins a code block which consists a part of the entrypoint of CUDA program.
The CUDA program setup the referable <code>arg1</code>, <code>arg2</code>, ... variables according to the arguments of PL/CUDA function, then switch control to the user defined portion. This code block is a host code; we can implement own control logic working on CPU or heavy calculation by GPU kernel invocation.</p>
<p>Result of PL/CUDA function can be returned using <code>return</code> statement of CUDA C, according to the function definition.</p>
<h3 id="plcuda_end"><code>#plcuda_end</code></h3>
<p>It marks end of the kernel function code block. By the way, if a directive to start code block was put inside of the different code block, the current code block is implicitly closed by the <code>#plcuda_end</code> directive.</p>
<h3 id="plcuda_include-ltfunction-namegt"><code>#plcuda_include &lt;function name&gt;</code></h3>
<p>This directive is similar to <code>#include</code> of CUDA C, however, it injects result of the specified SQL function onto the location where the directive was written.
The SQL function should have identical arguments and return <code>text</code> data.</p>
<p>For example, when we calculate similarity of massive items, we can generate multiple variant of the algorithm on the fly that is almost equivalent but only distance definitions are different. It makes maintenance of PL/CUDA function simplified.</p>
<h3 id="plcuda_library-ltlibrary-namegt"><code>#plcuda_library &lt;library name&gt;</code></h3>
<p>It specifies the library name to be linked when CUDA program is built by <code>nvcc</code>.
The <code>&lt;library name&gt;</code> portion is supplied to <code>nvcc</code> command as <code>-l</code> option.
For example, if <code>libcublas.co</code> library is linked, you need to describe <code>cublas</code> without prefix (<code>lib</code>) and suffix (<code>.so</code>).
Right now, we can specify the libraries only installed on the standard library path of CUDA Toolkit (`/usr/local/cuda/lib64).</p>
<h3 id="plcuda_sanity_check-ltfunctiongt"><code>#plcuda_sanity_check &lt;function&gt;</code></h3>
<p>It allows to specify the sanity check function that preliminary checks adequacy of the supplied arguments, prior to GPU kernel launch.
No sanity check function is configured on the default.
Usually, launch of GPU kernel function is heavier task than call of another function on CPU, because it also involves initialization of GPU devices. If supplied arguments have unacceptable values from the specification of the PL/CUDA function, a few thousands or millions (or more in some cases) of GPU kernel threads shall be launched just to check the arguments and return an error status. If sanity check can be applied prior to the launch of GPU kernel function with enough small cost, it is a valuable idea to raise an error using sanity check function prior to the GPU kernel function. The sanity check function takes identical arguments with PL/CUDA function, and returns <code>bool</code> data type.</p>
<h2 id="plcuda-related-functions">PL/CUDA Related Functions</h2>
<table>
<thead>
<tr>
<th align="left">Definition</th>
<th align="center">Result</th>
<th align="left">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left"><code>plcuda_function_source(regproc)</code></td>
<td align="center"><code>text</code></td>
<td align="left">It returns source code of the GPU kernel generated from the PL/CUDA function, towards the OID input of PL/CUDA function as argument.</td>
</tr>
</tbody>
</table>
<h3 id="support-functions-for-plcuda-invocations">Support functions for PL/CUDA invocations</h3>
<p>The functions below are provided to simplify invocation of PL/CUDA functions.</p>
<table>
<thead>
<tr>
<th align="left">Definition</th>
<th align="center">Result</th>
<th align="left">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left"><code>attnums_of(regclass,text[])</code></td>
<td align="center"><code>smallint[]</code></td>
<td align="left">It returns attribute numbers for the column names (may be multiple) of the 2nd argument on the table of the 1st argument.</td>
</tr>
<tr>
<td align="left"><code>attnum_of(regclass,text)</code></td>
<td align="center"><code>smallint</code></td>
<td align="left">It returns attribute number for the column name of the 2nd argument on the table of the 1st argument.</td>
</tr>
<tr>
<td align="left"><code>atttypes_of(regclass,text[])</code></td>
<td align="center"><code>regtype[]</code></td>
<td align="left">It returns data types for the column names (may be multiple) of the 2nd argument on the table of the 1st argument.</td>
</tr>
<tr>
<td align="left"><code>atttype_of(regclass,text)</code></td>
<td align="center"><code>regtype</code></td>
<td align="left">It returns data type for the column name of the 2nd argument on the table of the 1st argument.</td>
</tr>
<tr>
<td align="left"><code>attrs_types_check(regclass,text[],regtype[])</code></td>
<td align="center"><code>bool</code></td>
<td align="left">It checks whether the data types of the columns (may be multiple) of the 2nd argument on the table of the 1st argument match with the data types of the 3rd argument for each.</td>
</tr>
<tr>
<td align="left"><code>attrs_type_check(regclass,text[],regtype)</code></td>
<td align="center"><code>bool</code></td>
<td align="left">It checks whether all the data types of the columns (may be multiple) of the 2nd argument on the table of the 1st argument match with the data type of the 3rd argument.</td>
</tr>
</tbody>
</table>
<h3 id="array-matrix-functions">Array-Matrix Functions</h3>
<p>This section introduces the SQL functions that supports array-based matrix types provided by PG-Strom.</p>
<ul>
<li>2-dimensional Array</li>
<li>Element of array begins from 1 for each dimension</li>
<li>No NULL value is contained</li>
<li>Length of the array is less than 1GB, due to the restriction of variable length datum in PostgreSQL</li>
<li>Array with <code>smallint</code>, <code>int</code>, <code>bigint</code>, <code>real</code> or <code>float</code> data type</li>
</ul>
<p>If and when the array satisfies the above terms, we can determine the location of (i,j) element of the array by the index uniquely, and it enables GPU thread to fetch the datum to be processed very efficiently. Also, array-based matrix packs only the data to be used for calculation, unlike usual row-based format, so it has advantaged on memory consumption and data transfer.</p>
<table>
<thead>
<tr>
<th align="left">Definition</th>
<th align="center">Result</th>
<th align="left">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left"><code>array_matrix(variadic arg, ...)</code></td>
<td align="center"><code>array</code></td>
<td align="left">It is an aggregate function that combines all the rows supplied. For example, when 3 <code>float</code> arguments were supplied by 1000 rows, it returns an array-based matrix of 3 columns X 1000 rows, with <code>float</code> data type.<br>This function is declared to take variable length arguments. The <code>arg</code> takes one or more scalar values of either <code>smallint</code>, <code>int</code>, <code>bigint</code>, <code>real</code> or <code>float</code>. All the arg must have same data types.</td>
</tr>
<tr>
<td align="left"><code>matrix_unnest(array)</code></td>
<td align="center"><code>record</code></td>
<td align="left">It is a set function that extracts the array-based matrix to set of records. <code>array</code> is an array of <code>smallint</code>, <code>int</code>, <code>bigint</code>, <code>real</code> or <code>float</code> data. It returns <code>record</code> type which consists of more than one columns according to the width of matrix. For example, in case of a matrix of 10 columns X 500 rows, each records contains 10 columns with element type of the matrix, then it generates 500 of the records. <br>It is similar to the standard <code>unnest</code> function, but generates <code>record</code> type, thus, it requires to specify the record type to be returned using <code>AS (colname1 type[, ...])</code> clause.</td>
</tr>
<tr>
<td align="left"><code>rbind(array, array)</code></td>
<td align="center"><code>array</code></td>
<td align="left"><code>array</code> is an array of <code>smallint</code>, <code>int</code>, <code>bigint</code>, <code>real</code> or <code>float</code> data. This function combines the supplied two matrices vertically. Both matrices needs to have same element data type. If width of matrices are not equivalent, it fills up the padding area by zero.</td>
</tr>
<tr>
<td align="left"><code>rbind(array)</code></td>
<td align="center"><code>array</code></td>
<td align="left"><code>array</code> is an array of <code>smallint</code>, <code>int</code>, <code>bigint</code>, <code>real</code> or <code>float</code> data. This function is similar to <code>rbind(array, array)</code>, but performs as an aggregate function, then combines all the input matrices into one result vertically.</td>
</tr>
<tr>
<td align="left"><code>cbind(array, array)</code></td>
<td align="center"><code>array</code></td>
<td align="left"><code>array</code> is an array of <code>smallint</code>, <code>int</code>, <code>bigint</code>, <code>real</code> or <code>float</code> data. This function combines the supplied two matrices horizontally. Both matrices needs to have same element data type. If height of matrices are not equivalent, it fills up the padding area by zero.</td>
</tr>
<tr>
<td align="left"><code>cbind(array)</code></td>
<td align="center"><code>array</code></td>
<td align="left"><code>array</code> is an array of <code>smallint</code>, <code>int</code>, <code>bigint</code>, <code>real</code> or <code>float</code> data. This function is similar to cbind(array, array), but performs as an aggregate function, then combines all the input matrices into one result horizontally.</td>
</tr>
<tr>
<td align="left"><code>transpose(array)</code></td>
<td align="center"><code>array</code></td>
<td align="left"><code>array</code> is an array of <code>smallint</code>, <code>int</code>, <code>bigint</code>, <code>real</code> or <code>float</code> data. This function makes a transposed matrix that swaps height and width of the supplied matrix.</td>
</tr>
<tr>
<td align="left"><code>array_matrix_validation(anyarray)</code></td>
<td align="center"><code>bool</code></td>
<td align="left">It validates whether the supplied array (<code>anyarray</code>) is adequate for the array-based matrix. It is intended to use for sanity check prior to invocation of PL/CUDA function, or check constraint on domain type definition.</td>
</tr>
<tr>
<td align="left"><code>array_matrix_height(array)</code></td>
<td align="center"><code>int</code></td>
<td align="left"><code>array</code> is an array of either <code>smallint</code>, <code>int</code>, <code>bigint</code>, <code>real</code> or <code>float</code> data. This function returns the height of the supplied matrix.</td>
</tr>
<tr>
<td align="left"><code>array_matrix_width(array)</code></td>
<td align="center"><code>int</code></td>
<td align="left"><code>array</code> is an array of either <code>smallint</code>, <code>int</code>, <code>bigint</code>, <code>real</code> or <code>float</code> data. This function returns the width of the supplied matrix.</td>
</tr>
</tbody>
</table>
              
            </div>
          </div>
          <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="../ref_types/" class="btn btn-neutral float-right" title="Data Types">Next <span class="icon icon-circle-arrow-right"></span></a>
      
      
        <a href="../gstore_fdw/" class="btn btn-neutral" title="Gstore_fdw"><span class="icon icon-circle-arrow-left"></span> Previous</a>
      
    </div>
  

  <hr/>

  <div role="contentinfo">
    <!-- Copyright etc -->
    
  </div>

  Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
      
        </div>
      </div>

    </section>

  </div>

  <div class="rst-versions" role="note" style="cursor: pointer">
    <span class="rst-current-version" data-toggle="rst-current-version">
      
      
        <span><a href="../gstore_fdw/" style="color: #fcfcfc;">&laquo; Previous</a></span>
      
      
        <span style="margin-left: 15px"><a href="../ref_types/" style="color: #fcfcfc">Next &raquo;</a></span>
      
    </span>
</div>
    <script>var base_url = '..';</script>
    <script src="../js/theme.js" defer></script>
      <script src="../search/main.js" defer></script>

</body>
</html>
